# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 20_party_id.R
### This script creates figures related to the party ID of subjects.
# ----
# Load and recode data ----
# Load the village level party_id data 
party_data <- read_excel("./data/raw/baseline/party_id.xlsx")

# Create standardized party categories for elections

party_data <- party_data %>%
  mutate(
    state_election_2020_party_new = case_when(
      state_election_2020_party == "bjp" ~ "BJP",
      state_election_2020_party == "rjd" ~ "RJD",
      state_election_2020_party == "congress" ~ "INC",
      state_election_2020_party == "jdu" ~ "JDU",
      state_election_2020_party == "bjp rjd" ~ "BJP/RJD",
      !is.na(state_election_2020_party) & state_election_2020_party != "" ~ "Others",
      TRUE ~ NA_character_
    )
  )

party_data <- party_data %>%
  mutate(
    national_election_2019_party_new = case_when(
      national_election_2019_party == "bjp" ~ "BJP",
      national_election_2019_party == "rjd" ~ "RJD",
      national_election_2019_party == "congress" ~ "INC",
      national_election_2019_party == "jdu" ~ "JDU",
      national_election_2019_party == "bjp rjd" ~ "BJP/RJD",
      !is.na(national_election_2019_party) & national_election_2019_party != "" ~ "Others",
      TRUE ~ NA_character_
    )
  )


# Create bar chart for national election preferences ----
national_summary <- party_data %>%
  filter(!is.na(national_election_2019_party_new) & national_election_2019_party_new != "") %>%
  count(national_election_2019_party_new) %>%
  mutate(percent = n / sum(n) * 100) %>%
  arrange(desc(percent))

national_plot <- national_summary %>%
  ggplot(aes(x = fct_reorder(national_election_2019_party_new, percent), y = percent)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  labs(
    title = "",
    x = "",
    y = "Percent"
  ) +
  theme_bw() %+replace%
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.text.y = element_text(angle = 0),
    panel.grid = element_blank(),
    plot.background = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(color = "black")
  )

# Save national preference plot
ggsave("./output/figures/national_preference.pdf", national_plot, 
       width = 8, height = 6, dpi = 300)


# Create bar chart for state election preferences ----
state_summary <- party_data %>%
  filter(!is.na(state_election_2020_party_new) & state_election_2020_party_new != "") %>%
  count(state_election_2020_party_new) %>%
  mutate(percent = n / sum(n) * 100) %>%
  arrange(desc(percent))

state_plot <- state_summary %>%
  ggplot(aes(x = fct_reorder(state_election_2020_party_new, percent), y = percent)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  labs(
    title = "",
    x = "",
    y = "Percent"
  ) +
  theme_bw() %+replace%
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
    axis.text.y = element_text(angle = 0),
    panel.grid = element_blank(),
    plot.background = element_blank(),
    panel.border = element_blank(),
    axis.line = element_line(color = "black")
  )

# Save state preference plot
ggsave("./output/figures/state_preference.pdf", state_plot, 
       width = 8, height = 6, dpi = 300)

# Create stacked bar chart for party preferences by caste ----
# Create vote counting variables
party_data <- party_data %>%
  mutate(
    # Count BJP mentions
    bjp_national_votes = as.numeric(str_detect(national_election_2019_party, "bjp")),
    bjp_state_votes = as.numeric(str_detect(state_election_2020_party, "bjp")),
    
    # Count RJD mentions
    rjd_national_votes = as.numeric(str_detect(national_election_2019_party, "rjd")),
    rjd_state_votes = as.numeric(str_detect(state_election_2020_party, "rjd")),
    
    # Count other party mentions (neither BJP nor RJD)
    other_national_votes = as.numeric(bjp_national_votes == 0 & rjd_national_votes == 0),
    other_state_votes = as.numeric(bjp_state_votes == 0 & rjd_state_votes == 0),
    
    # Combine votes for each party
    bjp_total_votes = bjp_national_votes + bjp_state_votes,
    rjd_total_votes = rjd_national_votes + rjd_state_votes,
    other_total_votes = other_national_votes + other_state_votes
  )

# PRESERVE equivalent: Create separate dataset for caste analysis
caste_analysis <- party_data %>%
  # Collapse (sum) equivalent: group by jati_new and sum votes
  group_by(jati_new) %>%
  summarise(
    bjp_total_votes = sum(bjp_total_votes, na.rm = TRUE),
    rjd_total_votes = sum(rjd_total_votes, na.rm = TRUE),
    other_total_votes = sum(other_total_votes, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # Calculate percentages
  mutate(
    total_votes = bjp_total_votes + rjd_total_votes + other_total_votes,
    bjp_percent = (bjp_total_votes / total_votes) * 100,
    rjd_percent = (rjd_total_votes / total_votes) * 100,
    other_percent = (other_total_votes / total_votes) * 100
  ) %>%
  # Sort by total votes (gsort -total_votes equivalent)
  arrange(desc(total_votes)) %>%
  # Add rank
  mutate(rank = row_number()) %>%
  # Keep top 10 communities
  filter(rank <= 10) %>%
  # Add ID for plotting
  mutate(id = -row_number()) %>%
  # Select only needed variables
  select(bjp_percent, rjd_percent, other_percent, jati_new, id) %>%
  # Standardize jati names for display
  mutate(
    jati_new = case_when(
      jati_new == "bc1-6" ~ "Kurmi",
      jati_new == "bc1-94" ~ "Julaha",
      jati_new == "bc2-20" ~ "Baniya",
      jati_new == "bc2-22" ~ "Yadav",
      jati_new == "gn-1" ~ "Brahmin",
      jati_new == "gn-2" ~ "Bhumihar",
      jati_new == "gn-3" ~ "Rajput",
      jati_new == "gn-4" ~ "Kayashta",
      jati_new == "sc-11" ~ "Paswan",
      jati_new == "sc-6" ~ "Chamar",
      jati_new == "bc2-4" ~ "Kushwaha",
      jati_new == "bc1-38" ~ "Dhanuk",
      TRUE ~ jati_new
    )
  )

# Reshape to long format for stacked bar chart
caste_long <- caste_analysis %>%
  pivot_longer(
    cols = c(bjp_percent, rjd_percent, other_percent),
    names_to = "party",
    values_to = "percent"
  ) %>%
  mutate(
    party = case_when(
      party == "bjp_percent" ~ "BJP",
      party == "rjd_percent" ~ "RJD",
      party == "other_percent" ~ "Other"
    ),
    jati_new = str_to_title(jati_new)
  )

# Create stacked bar chart for party preferences by caste
caste_plot <- caste_long %>%
  ggplot(aes(x = fct_reorder(jati_new, id), y = percent, fill = party)) +
  geom_col(position = "stack") +
  scale_fill_manual(
    values = c("BJP" = "#0066CC", "RJD" = "#FF0000", "Other" = "#228B22"),
    name = "",
    breaks = c("BJP", "RJD", "Other")
  ) +
  labs(
    title = "",
    x = "",
    y = "Percent"
  ) +
  theme_bw() %+replace%
  theme(
    axis.text.x = element_text(size = 12, angle = 60),
    axis.text.y = element_text(size = 12),
    axis.ticks.x = element_blank(),
    legend.position = "right",
    panel.grid = element_blank(),
    plot.background = element_blank(),
    panel.border = element_blank()
  )

# Save caste preference plot
ggsave("./output/figures/party_preference.pdf", caste_plot, 
       width = 10, height = 8, dpi = 300)

# Create stacked bar chart for party preferences by library ----
# Extract library code from village_id (first 3 characters)
library_analysis <- party_data %>%
  mutate(library = str_sub(village_id, 1, 3)) %>%
  # Collapse by library
  group_by(library) %>%
  summarise(
    bjp_total_votes = sum(bjp_total_votes, na.rm = TRUE),
    rjd_total_votes = sum(rjd_total_votes, na.rm = TRUE),
    other_total_votes = sum(other_total_votes, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # Calculate percentages
  mutate(
    total_votes = bjp_total_votes + rjd_total_votes + other_total_votes,
    bjp_percent = (bjp_total_votes / total_votes) * 100,
    rjd_percent = (rjd_total_votes / total_votes) * 100,
    other_percent = (other_total_votes / total_votes) * 100
  )

# Reshape library data to long format for plotting
library_long <- library_analysis %>%
  pivot_longer(
    cols = c(bjp_percent, rjd_percent, other_percent),
    names_to = "party",
    values_to = "percent"
  ) %>%
  mutate(
    party = case_when(
      party == "bjp_percent" ~ "BJP",
      party == "rjd_percent" ~ "RJD",
      party == "other_percent" ~ "Other"
    )
  )

# Create stacked bar chart for party preferences by library
library_plot <- library_long %>%
  ggplot(aes(x = library, y = percent, fill = party)) +
  geom_col(position = "stack") +
  scale_fill_manual(
    values = c("BJP" = "#0066CC", "RJD" = "#FF0000", "Other" = "#228B22"),
    name = "",
    breaks = c("BJP", "RJD", "Other")
  ) +
  labs(
    title = "",
    x = "Libraries",
    y = "Percent"
  ) +
  theme_bw() %+replace%
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_text(size = 12),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14, angle = 90),
    axis.ticks.x = element_blank(),
    legend.position = "right",
    panel.grid = element_blank(),
    plot.background = element_blank(),
    panel.border = element_blank()
  )

# Save library preference plot
ggsave("output/figures/party_preference_library.pdf", library_plot, 
       width = 12, height = 6, dpi = 300)

# END of 20_party_id.R ----